Get data - smooth disturbed and control plots together - AGB growth
Code
library (mgcv); library (data.table); library (gratia); library (marginaleffects); library (ggplot2); library (dplyr); library (tidyr); library (zoo); library (slider); library (here);library (broom)
#####test Paracou####
obs <- read.csv ("G:/My Drive/cesab bioforest/Data/aggregated_data_v9.csv" )
setDT (obs)
obs= obs[obs$ Site== "Paracou" ]
setnames (obs, "Plot" , "subplot" )
obs$ plot= gsub ("_." , "" ,obs$ subplot )
obs <- obs[! Year%in% 1984 : 1990 ]
obs <- dcast (
obs,
subplot + plot + Year ~ variable,
value.var = "value"
)
##### upload data
load ("../clim_by_site.rda" )
clim_all= rbindlist (lapply (clim_by_site[["Paracou" ]], function (x) x$ climate[[1 ]]),
idcol = "plot" )
#"Paracou", "Mbaiki", "Corinto", "SUAS", "Lesong", "Ulu Muda", "Sungai Lalang", "Tene", "Jari"
baseline <- clim_all[year >= 1981 & year <= 2010 ]
clim_stats <- baseline[, .(
mean_tmax = mean (tmax),
sd_tmax = sd (tmax),
mean_vpd = mean (vpd),
sd_vpd = sd (vpd),
mean_srad = mean (srad),
sd_srad = sd (srad),
mean_def = mean (def),
sd_def = sd (def)
), by = month]
clim_all <- merge (clim_all, clim_stats, by = "month" , all.x = TRUE )
clim_all[, ` := ` (
z_tmax = (tmax - mean_tmax) / sd_tmax,
z_vpd = (vpd - mean_vpd) / sd_vpd,
z_srad = (srad - mean_srad) / sd_srad,
z_def = (def - mean_def) / sd_def
)]
#climate anomalies follow inventories. For Paracou, mean per year before 1996, two years after
clim_all[, year_bin : =
ifelse (year <= 1995 ,
year, # keep original year
1997 + ((year - 1996 ) %/% 2 ) * 2 # 2-year bins
)]
census_anom <- clim_all[,.(
mean_z_tmax = mean (z_tmax, na.rm = TRUE ),
mean_z_vpd = mean (z_vpd, na.rm = TRUE ),
mean_z_srad = mean (z_srad, na.rm = TRUE ),
mean_z_def = mean (z_def, na.rm = TRUE )
),
by = .(plot, year_bin)]
#AGB values before 1991 are not consistent
census_anom= census_anom[year_bin>= 1991 ]
merged <- merge (
obs,
census_anom,
by.x = c ("Year" ,"plot" ), by.y= c ("year_bin" ,"plot" )
)
control= c (1 ,6 ,11 ,13 ,14 ,15 )
merged$ Treatment <- ifelse (merged$ plot %in% control, "control" , "disturbed" )
merged$ Treatment <- factor (merged$ Treatment)
merged$ plot <- factor (merged$ plot)
merged$ subplot <- factor (merged$ subplot)
GAM k=5 and k=8
NAs for agb_growth - plots 13,14 and 15 miss data for 1991
Code
merged <- merged %>%
group_by (subplot) %>%
mutate (
fitted_GAM5 = predict (
gam (agb_growth ~ s (Year, k = 5 ), data = cur_data ()),
newdata = cur_data ()
)
) %>%
ungroup ()
p1.1 <- ggplot (merged, aes (Year, agb_growth - fitted_GAM5)) +
geom_point () +
ggtitle ("GAM5 residuals" )
p1.1
Code
ggplot (merged, aes (x = Year, y = fitted_GAM5, color = subplot)) +
geom_line (size = 1 ) + # predicted trajectory
geom_point (data = merged,
aes (x = Year, y = agb_growth, color = subplot),
inherit.aes = FALSE , size = 2 ) + # observed points
facet_wrap (~ subplot, scales = "free_y" )+
theme_bw () +
labs (x = "Year" ,
y = "agb_growth" , title= "GAM-k=5" ) + theme (legend.position = "none" )
GAM at edges - higher standard error, GAM uses many basis functions (Thin Plate Regression Splines (TPRS)) combined to estimate an unknown curve. Instead of minimizing just the residual error the model penalize large curvatures, which can be demonstrated with the standard errors. (for bs=fs, penalty is Shared among the subplots – it is supposed to control for overfitting) **
Code
#see SE at adges
m= gam (agb_growth ~ s (Year, k = 5 ), data = merged)
pred <- predict (m, newdata = merged, se.fit = TRUE )
setDT (merged)
merged[, se_GAM5 : = pred$ se.fit]
ggplot (merged, aes (Year, se_GAM5)) +
geom_point (alpha = 0.3 ) +
geom_smooth (se = FALSE , linewidth = 1.2 ) +
labs (title = "Standard error of GAM smooth over Year" ,
y = "Standard error" )
Error per subplot
Code
ggplot (merged, aes (Year, agb_growth- fitted_GAM5)) +
geom_point (alpha = 0.3 ) +
facet_wrap (~ subplot) +
geom_hline (yintercept = 0 , linetype = 2 ) +
labs (y = "Residual" ,
title = "Directional GAM k=5 errors by subplot" )
Code
bias_by_subplot <- merged %>%
group_by (subplot) %>%
arrange (Year) %>%
summarise (
res = agb_growth - fitted_GAM5,
mean_resid = mean (res, na.rm = TRUE ),
edge_early = mean (res[rank (Year) <= 5 ], na.rm = TRUE ),
edge_late = mean (res[rank (Year, ties.method = "first" , na.last = "keep" ) >
(n_distinct (Year) - 5 )], na.rm = TRUE ),
.groups= "drop"
)
long_bias <- bias_by_subplot %>%
pivot_longer (
cols = c (mean_resid, edge_early, edge_late),
names_to = "period" ,
values_to = "residual"
)
Code
ggplot (long_bias, aes (residual, subplot, color = period)) +
geom_vline (xintercept = 0 , linetype = 2 , alpha = 0.5 ) +
geom_point (size = 3 ) +
labs (
x = "Mean residual (obs - pred)" ,
y = "Subplot" ,
title = "Directional GAM bias by subplot" ,
color = "Period"
) +
theme_minimal ()
Code
#Residuals dont seem to display a systematic edge bias:no over-prediction at the beginning of the time series, no under-prediction at the end
GAM k=8
Code
merged <- merged %>%
group_by (subplot) %>%
mutate (
fitted_GAM8 = predict (
gam (agb_growth ~ s (Year, k = 8 ), data = cur_data ()),
newdata = cur_data ()
)
)%>%
ungroup ()
p11.1 <- ggplot (merged, aes (Year, agb_growth - fitted_GAM8)) +
geom_point () +
ggtitle ("GAM8 residuals" )
p11.1
Code
p11= ggplot (merged, aes (x = Year, y = fitted_GAM8, color = subplot)) +
geom_line (size = 1 ) + # predicted trajectory
geom_point (data = merged,
aes (x = Year, y = agb_growth, color = subplot),
inherit.aes = FALSE , size = 2 ) + # observed points
facet_wrap (~ subplot, scales = "free_y" )+
theme_bw () +
labs (x = "Year" ,
y = "agb_growth" , title= "GAM-k=8" ) + theme (legend.position = "none" )
p11
Code
m= gam (agb_growth ~ s (Year, k = 8 ), data = merged)
pred <- predict (m, newdata = merged, se.fit = TRUE )
setDT (merged)
merged[, se_GAM8 : = pred$ se.fit]
ggplot (merged, aes (Year, se_GAM8)) +
geom_point (alpha = 0.3 ) +
geom_smooth (se = FALSE , linewidth = 1.2 ) +
labs (title = "Standard error of GAM smooth over Year" ,
y = "Standard error" )
Error per subplot
Code
ggplot (merged, aes (Year, agb_growth- fitted_GAM8)) +
geom_point (alpha = 0.3 ) +
facet_wrap (~ subplot) +
geom_hline (yintercept = 0 , linetype = 2 ) +
labs (y = "Residual" ,
title = "Directional GAM errors by subplot" )
Code
bias_by_subplot <- merged %>%
group_by (subplot) %>%
arrange (Year) %>%
summarise (
res = agb_growth - fitted_GAM8,
mean_resid = mean (res, na.rm = TRUE ),
edge_early = mean (res[rank (Year) <= 5 ], na.rm = TRUE ),
edge_late = mean (res[rank (Year, ties.method = "first" , na.last = "keep" ) >
(n_distinct (Year) - 5 )], na.rm = TRUE ),
.groups= "drop"
)
long_bias <- bias_by_subplot %>%
pivot_longer (
cols = c (mean_resid, edge_early, edge_late),
names_to = "period" ,
values_to = "residual"
)
Code
ggplot (long_bias, aes (residual, subplot, color = period)) +
geom_vline (xintercept = 0 , linetype = 2 , alpha = 0.5 ) +
geom_point (size = 3 ) +
labs (
x = "Mean residual (obs - pred)" ,
y = "Subplot" ,
title = "Directional GAM bias by subplot" ,
color = "Period"
) +
theme_minimal ()
slider
complete=TRUE (reject incomplete windows, Per row: will keep 5 observations- in initial inventories = 5 years, after 1995 = 10 years..
Code
merged <- merged %>%
group_by (subplot) %>%
arrange (Year) %>%
mutate (fitted_slide_complete = slide_dbl (agb_growth, mean, .before = 2 , .after = 2 , .complete= TRUE ))%>%
ungroup ()
p3.1 <- ggplot (merged, aes (Year, agb_growth - fitted_slide_complete)) +
geom_point () +
ggtitle ("slide residuals" )
p3.1
Code
p3= ggplot (merged, aes (x = Year, y = fitted_slide_complete, color = subplot)) +
geom_line (size = 1 ) + # predicted trajectory
geom_point (data = merged,
aes (x = Year, y = agb_growth, color = subplot),
inherit.aes = FALSE , size = 2 ) + # observed points
facet_wrap (~ subplot, scales = "free_y" )+
theme_bw () +
labs (x = "Year" ,
y = "agb_growth" , title= "slide - .before = 2, .after = 2, window per observation" )+ theme (legend.position = "none" )
p3
Error per subplot
Code
ggplot (merged, aes (Year, agb_growth- fitted_slide_complete)) +
geom_point (alpha = 0.3 ) +
facet_wrap (~ subplot) +
geom_hline (yintercept = 0 , linetype = 2 ) +
labs (y = "Residual" ,
title = "Directional slide errors by subplot" )
Code
bias_by_subplot <- merged %>%
group_by (subplot) %>%
arrange (Year) %>%
summarise (
res = agb_growth - fitted_slide_complete,
mean_resid = mean (res, na.rm = TRUE ),
edge_early = mean (res[rank (Year) <= 5 ], na.rm = TRUE ),
edge_late = mean (res[rank (Year, ties.method = "first" , na.last = "keep" ) >
(n_distinct (Year) - 5 )], na.rm = TRUE ),
.groups= "drop"
)
long_bias <- bias_by_subplot %>%
pivot_longer (
cols = c (mean_resid, edge_early, edge_late),
names_to = "period" ,
values_to = "residual"
)
**inherent of the method?
Code
ggplot (long_bias, aes (residual, subplot, color = period)) +
geom_vline (xintercept = 0 , linetype = 2 , alpha = 0.5 ) +
geom_point (size = 3 ) +
labs (
x = "Mean residual (obs - pred)" ,
y = "Subplot" ,
title = "Directional slide bias by subplot" ,
color = "Period"
) +
theme_minimal ()
Code
#Residuals display a systematic edge bias: over-prediction at the beginning of the time series and under-prediction at the end, #consistent with boundary effects in penalized smoothers.
Differences on window sizes among methods
Code
merged <- merged %>% arrange (subplot, Year)
merged <- merged %>%
group_by (subplot) %>%
mutate (
# ----------------------------
# 1) Sliding by ROWS
# ----------------------------
slide_row = slide_dbl (
agb_growth,
mean,
.before = 2 ,
.after = 2 ,
.complete = TRUE
),
n_row = slide_int (
agb_growth,
~ length (.x),
.before = 2 ,
.after = 2 ,
.complete = TRUE
),
# ----------------------------
# 2) Sliding by YEARS
# ----------------------------
slide_year = slide_index_dbl (
.x = agb_growth,
.i = Year,
.f = mean,
.before = 2 ,
.after = 2 ,
.complete = TRUE
),
n_year = slide_index_int (
.x = agb_growth,
.i = Year,
.f = ~ length (.x),
.before = 2 ,
.after = 2 ,
.complete = TRUE
),
# ----------------------------
# 2) Sliding by YEARS complete=F
# ----------------------------
slide_year_false = slide_index_dbl (
.x = agb_growth,
.i = Year,
.f = mean,
.before = 2 ,
.after = 2 ,
.complete = FALSE
),
n_year_false = slide_index_int (
.x = agb_growth,
.i = Year,
.f = ~ length (.x),
.before = 2 ,
.after = 2 ,
.complete = FALSE
)
) %>%
ungroup ()
ggplot (
filter (merged, subplot == "1_1" ),
aes (x = Year)
) +
geom_line (aes (y = n_row), linewidth = 1 ) +
geom_line (aes (y = n_year), linetype = "dashed" , linewidth = 1 ) +
geom_line (aes (y = n_year_false), color = "red" ) +
labs (
y = "Points per window" ,
title = "ex.: Subplot 1_1" ,
subtitle = "Solid = row sliding | Dashed = year sliding | Red = year sliding complete=F"
)
Code
#plot(merged$slide_year_false, merged$slide_year)
complete=TRUE (reject incomplete windows Per year: will keep 5 observations- in initial inventories = 5 years, after 1995 = 5 years (but with INCOMPLETE windows)..
Code
merged <- merged %>%
group_by (subplot) %>%
arrange (Year) %>%
mutate (fitted_slide_completeYear = slide_index_dbl (agb_growth, Year, mean, .before = 2 , .after = 2 , .complete= TRUE ))%>%
ungroup ()
p33.1 <- ggplot (merged, aes (Year, agb_growth - fitted_slide_completeYear)) +
geom_point () +
ggtitle ("slide residuals" )
p33.1
Code
p33= ggplot (merged, aes (x = Year, y = fitted_slide_completeYear, color = subplot)) +
geom_line (size = 1 ) + # predicted trajectory
geom_point (data = merged,
aes (x = Year, y = agb_growth, color = subplot),
inherit.aes = FALSE , size = 2 ) + # observed points
facet_wrap (~ subplot, scales= "free_y" )+
theme_bw () +
labs (x = "Year" ,
y = "agb_growth" , title= "slide - .before = 2, .after = 2, window per year" )+ theme (legend.position = "none" )
p33
Error per subplot
Code
ggplot (merged, aes (Year, agb_growth- fitted_slide_completeYear)) +
geom_point (alpha = 0.3 ) +
facet_wrap (~ subplot) +
geom_hline (yintercept = 0 , linetype = 2 ) +
labs (y = "Residual" ,
title = "Directional slide errors by subplot" )
complete=FALSE, per Year no NAs, use smaller windows at edges
Code
merged <- merged %>%
group_by (subplot) %>%
arrange (Year) %>%
mutate (fitted_slide = slide_dbl (agb_growth, mean, .before = 2 , .after = 2 , .complete= TRUE ))%>%
ungroup ()
p333.1 <- ggplot (merged, aes (Year, agb_growth - fitted_slide)) +
geom_point () +
ggtitle ("slide residuals" )
p333.1
Code
p333= ggplot (merged, aes (x = Year, y = fitted_slide, color = subplot)) +
geom_line (size = 1 ) + # predicted trajectory
geom_point (data = merged,
aes (x = Year, y = agb_growth, color = subplot),
inherit.aes = FALSE , size = 2 ) + # observed points
facet_wrap (~ subplot, scales = "free_y" )+
theme_bw () +
labs (x = "Year" ,
y = "agb_growth" , title= "slide - .before = 2, .after = 2 - partial windows at edges" )+ theme (legend.position = "none" )
p333
Per year is more wiggly, more bias:
Code
ggplot (
merged,
aes (x = Year)
) +
geom_line (aes (y = slide_row), linewidth = 1 ) +
geom_line (aes (y = slide_year), linetype = "dashed" , linewidth = 1 ) +
geom_line (aes (y = slide_year_false), color = "red" )+
facet_wrap (~ subplot, scales = "free_y" )+
labs (
y = "Points per window" ,
title = "Solid = row sliding | Dashed = year sliding | Red = year sliding complete=F"
)
Most opposed models, GAM k=5 (more smooth), slide per year (more wiggly)
Code
ggplot (
merged,
aes (x = Year)
) +
geom_line (aes (y = fitted_GAM5), linewidth = 1 ) +
geom_line (aes (y = fitted_GAM8), color = "green" )+
geom_line (aes (y = slide_year), color = "red" )+
geom_line (aes (y = slide_row), color = "orange" )+
facet_wrap (~ subplot, scales = "free_y" )+
labs (
y = "Points per window" ,
title = "Black = fitted_GAM5 | Green = fitted_GAM8 | Red = year sliding | Orange = row sliding"
)
root mean square error
Code
rmse_results <- merged %>%
group_by (subplot) %>%
summarise (
RMSE_GAM8 = sqrt (mean ((agb_growth - fitted_GAM8)^ 2 , na.rm = TRUE )),
RMSE_GAM5 = sqrt (mean ((agb_growth - fitted_GAM5)^ 2 , na.rm = TRUE )),
RMSE_SLIDE = sqrt (mean ((agb_growth - fitted_slide)^ 2 , na.rm = TRUE )),
RMSE_SLIDE_COMPLETE = sqrt (mean ((agb_growth - fitted_slide_complete)^ 2 , na.rm = TRUE )),
RMSE_SLIDE_COMPLETE_YEAR = sqrt (mean ((agb_growth - fitted_slide_completeYear)^ 2 , na.rm = TRUE )),
) %>%
ungroup ()
rmse_long <- rmse_results %>%
pivot_longer (
cols = starts_with ("RMSE" ),
names_to = "Model" ,
values_to = "RMSE"
)
rmse_long <- rmse_long %>%
mutate (Model = reorder (Model, RMSE))
ggplot (rmse_long, aes (x = Model, y = RMSE)) +
geom_boxplot () +
theme_bw () +
theme (
axis.text.x = element_text (angle = 45 , hjust = 1 , size = 10 )
)
lm
Code
vars <- grep ("^fitted_" , names (merged), value = TRUE )
results <- data.frame ()
for (v in vars) {
mod <- lm (as.formula (paste0 ("agb_growth - " , v, " ~ Year" )), data = merged)
tmp <- tidy (mod, conf.int = TRUE ) %>% # IC95%
mutate (method = v)
results <- bind_rows (results, tmp)
}
effects <- results %>%
filter (term == "Year" )
ggplot (effects,
aes (x = estimate,
y = reorder (method, estimate))) +
geom_point (size = 2 ) +
geom_errorbarh (aes (xmin = conf.low,
xmax = conf.high),
height = 0.2 ) +
geom_vline (xintercept = 0 ,
linetype = "dashed" ) +
labs (x = "Effet de Year (slope ± IC95%)" ,
y = "Méthode" ,
title = "Year effect" ) +
theme_minimal ()
temporal signal on residuals
Code
methods <- c (
"fitted_GAM5" ,
"fitted_GAM8" ,
"fitted_slide" ,
"fitted_slide_completeYear" ,
"fitted_slide_complete"
)
for (m in methods) {
merged[[paste0 ("resid_" , m)]] <- merged$ agb_growth - merged[[m]]
}
pairwise_corr <- function (df, col) {
wide <- tidyr:: pivot_wider (df[, c ("subplot" ,"Year" , col)],
names_from = subplot, values_from = all_of (col))
M <- as.matrix (wide[, - 1 ]) # remove Year
C <- cor (M, use = "pairwise.complete.obs" )
mean (C[upper.tri (C)], na.rm = TRUE )
}
temporal_signal <- data.frame (
method = methods,
mean_pairwise_corr = sapply (paste0 ("resid_" , methods),
function (x) pairwise_corr (merged, x))
)
ggplot (temporal_signal, aes (x = method, y = mean_pairwise_corr)) +
geom_boxplot () +
theme_bw () +
theme (
axis.text.x = element_text (angle = 45 , hjust = 1 , size = 10 )
)
Testing full models
**GAM k=5 and slide_complete per Year
Code
##the right GAM model:
merged <- merged %>%
mutate (
fitted_GAM5_full = predict (
gam (agb_growth ~ subplot + s (Year,by = subplot, k = 5 ), data = cur_data ()),
newdata = cur_data ()
)
)
plot (merged$ fitted_GAM5, merged$ fitted_GAM5_full)
Code
mod_gam <- gam (
agb_growth ~
subplot +
s (Year, by = subplot, k = 5 ) +
mean_z_tmax: Treatment,
data = merged,
method = "REML"
)
#gam.check(mod_gam)
#summary(mod_gam)
mod_lm_gam <- lm (
agb_growth- fitted_GAM5 ~ mean_z_tmax: Treatment,
data = merged,
method = "REML"
)
#summary(mod_lm_gam)
mod_lm_slide <- lm (
agb_growth- fitted_slide_completeYear ~ mean_z_tmax: Treatment,
data = merged,
method = "REML"
)
#summary(mod_lm_slide)
term_pattern <- "^mean_z_tmax:Treatment"
table_results <- bind_rows (
# ---- GAM ----
broom.mixed:: tidy (mod_gam, parametric = TRUE ) %>%
filter (grepl (term_pattern, term)) %>%
mutate (
Model = "Full GAM" ,
Fit_stat = paste0 ("Dev expl = " ,
round (summary (mod_gam)$ dev.expl * 100 , 1 ), "%" )
),
# ---- LM on GAM residuals ----
tidy (mod_lm_gam, conf.int = TRUE ) %>%
filter (grepl (term_pattern, term)) %>%
mutate (
Model = "LM on GAM residuals" ,
Fit_stat = paste0 ("Adj R2 = " ,
round (summary (mod_lm_gam)$ adj.r.squared, 3 ))
),
# ---- LM on SLIDE residuals ----
tidy (mod_lm_slide, conf.int = TRUE ) %>%
filter (grepl (term_pattern, term)) %>%
mutate (
Model = "LM on SLIDE residuals" ,
Fit_stat = paste0 ("Adj R2 = " ,
round (summary (mod_lm_slide)$ adj.r.squared, 3 ))
)
)
final_table <- table_results %>%
transmute (
Model,
Term = term,
Estimate = round (estimate, 3 ),
SE = round (std.error, 3 ),
p_value = format.pval (p.value, digits = 3 ),
Fit_stat
)
final_table
# A tibble: 6 × 6
Model Term Estimate SE p_value Fit_stat
<chr> <chr> <dbl> <dbl> <chr> <chr>
1 Full GAM mean_z_tmax:Treatmentco… -0.115 0.089 0.19851 Dev exp…
2 Full GAM mean_z_tmax:Treatmentdi… -0.155 0.072 0.03294 Dev exp…
3 LM on GAM residuals mean_z_tmax:Treatmentco… -0.057 0.057 0.31885 Adj R2 …
4 LM on GAM residuals mean_z_tmax:Treatmentdi… -0.078 0.047 0.09788 Adj R2 …
5 LM on SLIDE residuals mean_z_tmax:Treatmentco… -0.205 0.071 0.00385 Adj R2 …
6 LM on SLIDE residuals mean_z_tmax:Treatmentdi… -0.187 0.059 0.00157 Adj R2 …